knitr::opts_chunk$set(echo = TRUE)
# Set working directory
setwd("C:\\Users\\ranch\\OneDrive\\cabi-trip-history-data")
# Remove exisiting data
rm(list=ls(all=TRUE))
# Load libraries
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(scales)
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
##
## here
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
library(ggmap)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Link for Capital Bike Share program in Washington DC https://s3.amazonaws.com/capitalbikeshare-data/index.html
# Public data for Capital Bike Share program in Washington DC
load("FinalDataNew.rdata")
head(FinalDataNew)
## TotalDurationMs StartDate EndDate
## 1 51962000 2010-12-31 23:49:00 2011-01-01 14:15:00
## 2 514000 2010-12-31 23:37:00 2010-12-31 23:46:00
## 3 737000 2010-12-31 23:27:00 2010-12-31 23:39:00
## 4 953000 2010-12-31 23:21:00 2010-12-31 23:37:00
## 5 2179000 2010-12-31 23:20:00 2010-12-31 23:56:00
## 6 580000 2010-12-31 23:18:00 2010-12-31 23:28:00
## StartStationNumber StartStation EndStationNumber
## 1 31111 10th & U St NW 31111
## 2 31111 10th & U St NW 31202
## 3 31602 Park Rd & Holmead Pl NW 31401
## 4 31106 Calvert St & Woodley Pl NW 31401
## 5 31110 20th St & Florida Ave NW 31623
## 6 31107 Lamont & Mt Pleasant NW 31111
## EndStation BikeNumber AccountType
## 1 10th & U St NW W00771 Casual
## 2 14th & R St NW W01119 Registered
## 3 14th St & Spring Rd NW W00973 Registered
## 4 14th St & Spring Rd NW W00914 Registered
## 5 Columbus Circle / Union Station W00859 Casual
## 6 10th & U St NW W01119 Registered
dim(FinalDataNew) # the length of the data is 13,623,103
## [1] 13623103 9
ggplot(SummaryDateAndTime, aes(x = hour, y = N, colour = weekday)) +
geom_point(data = weekday_summary, aes(group = weekday)) +
geom_line(data = weekday_summary, aes(group = weekday)) +
scale_x_discrete("hour") +
scale_y_continuous("N") +
theme_minimal() +
ggtitle("Different demands in weekday and weekend.\n") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title=element_text(size=18))
# Create a dataframe with StartStationNumber and Weekday
StartStationinWeek.df <- data.frame(FinalDataNew$StartStationNumber, SummaryDateAndTime$weekday)
colnames(StartStationinWeek.df) <- c('StartStationNumber', 'SummaryWeekday')
head(StartStationinWeek.df)
## StartStationNumber SummaryWeekday
## 1 31111 Friday
## 2 31111 Friday
## 3 31602 Friday
## 4 31106 Friday
## 5 31110 Friday
## 6 31107 Friday
weekends <- c('Saturday', 'Sunday') #define Sat and Sun as weekend
# If the day is Saturday or Sunday, labels the day as "weekend", other wise labels it as "weekday"
StartStationinWeek.df$SummaryWeekday <- factor((StartStationinWeek.df$SummaryWeekday %in% weekends),
levels=c(TRUE, FALSE), labels=c('weekend', 'weekday'))
colnames(StartStationinWeek.df) <- c('StartStationNumber', 'WeekdayOrWeekend')
head(StartStationinWeek.df)
## StartStationNumber WeekdayOrWeekend
## 1 31111 weekday
## 2 31111 weekday
## 3 31602 weekday
## 4 31106 weekday
## 5 31110 weekday
## 6 31107 weekday
library(plyr)
SummaryStartStationinWeek.df <- count(StartStationinWeek.df, c('StartStationNumber', 'WeekdayOrWeekend'))
head(SummaryStartStationinWeek.df)
## StartStationNumber WeekdayOrWeekend freq
## 1 31000 weekend 2149
## 2 31000 weekday 5928
## 3 31001 weekend 6289
## 4 31001 weekday 14583
## 5 31002 weekend 7229
## 6 31002 weekday 17676
SummaryStationWeekday.df <- subset(SummaryStartStationinWeek.df[c(1,3)],
SummaryStartStationinWeek.df$WeekdayOrWeekend == 'weekday')
SummaryStationWeekend.df <- subset(SummaryStartStationinWeek.df[c(1,3)],
SummaryStartStationinWeek.df$WeekdayOrWeekend == 'weekend')
# Scale the freq in weekday and weekend
SummaryStationWeekday.df$freq <- SummaryStationWeekday.df$freq/5
SummaryStationWeekend.df$freq <- SummaryStationWeekend.df$freq/2
# Sort the Station by freq (from high to low)
SummaryStationWeekday.df <- SummaryStationWeekday.df[order(-SummaryStationWeekday.df$freq),]
SummaryStationWeekend.df <- SummaryStationWeekend.df[order(-SummaryStationWeekend.df$freq),]
# Find the top 5 rental station in weekday
Top5StationInWeekday.df <- SummaryStationWeekday.df[1:5,]
TopStationInWeekday.df <- cbind(Top5StationInWeekday.df[1], 'Weekday')
colnames(TopStationInWeekday.df)[2] <- 'Week'
# Find the top 5 rental station in weekend
Top5StationInWeekend.df <- SummaryStationWeekend.df[1:5,]
TopStationInWeekend.df <- cbind(Top5StationInWeekend.df[1], 'Weekend')
colnames(TopStationInWeekend.df)[2] <- 'Week'
# Merge the top 5 rental staions in weekday and weekend
TopStationInWeek.df <- merge(TopStationInWeekday.df,TopStationInWeekend.df, by = 'StartStationNumber', all = TRUE)
TopStationInWeek.df$Week.x <- as.character(TopStationInWeek.df$Week.x)
# If the station in included in both weekday and weekend stations label it as top stations all week
TopStationInWeek.df$Week.x[!is.na(TopStationInWeek.df$Week.x) & !is.na(TopStationInWeek.df$Week.y)] <- 'AllWeek'
TopStationInWeek.df$Week.x[is.na(TopStationInWeek.df$Week.x)] <- 'Weekend'
TopStationInWeek.df <- TopStationInWeek.df[1:2]
colnames(TopStationInWeek.df)[2] <- 'TopRentalStations'
TopStationInWeek.df
## StartStationNumber TopRentalStations
## 1 31101 Weekend
## 2 31200 AllWeek
## 3 31201 AllWeek
## 4 31229 Weekday
## 5 31623 Weekday
## 6 31241 Weekday
## 7 31247 Weekend
## 8 31258 Weekend
p <- DCMap + geom_point(data=TopStationInWeek.df2, aes(x = long, y = lat, color = TopRentalStations), size=5, alpha=1)
p
# rm(list=ls(all=TRUE)) # remove previous data
load("DemandPredictionDataSet.rdata") # load dataframe for predictive model
head(DemandPredictionDataSet)
## StartStationNumber AccountType year month day weekday hour minute
## 1 31111 Casual 2010 12 31 Friday 23 49
## 2 31111 Registered 2010 12 31 Friday 23 37
## 3 31602 Registered 2010 12 31 Friday 23 27
## 4 31106 Registered 2010 12 31 Friday 23 21
## 5 31110 Casual 2010 12 31 Friday 23 20
## 6 31107 Registered 2010 12 31 Friday 23 18
## EndStationNumber end_year end_month end_day end_weekday end_hour
## 1 31111 2011 1 1 Saturday 14
## 2 31202 2010 12 31 Friday 23
## 3 31401 2010 12 31 Friday 23
## 4 31401 2010 12 31 Friday 23
## 5 31623 2010 12 31 Friday 23
## 6 31111 2010 12 31 Friday 23
# Count rental demand in each hour by start station
HourlyDemandPrediction.df <- count(DemandPredictionDataSet,c(1,3,4,5,7))
# Count rental demand in each hour by end station
HourlyDemandPrediction.end.df <- count(DemandPredictionDataSet,c(9,10,11,13,14))
TopStations <- count(DemandPredictionDataSet,c('StartStationNumber'))
TopStations$StartStationNumber <- as.numeric(as.character(TopStations$StartStationNumber))
# Sort the stations by frequency
TopStations <- TopStations[order(-TopStations$freq), ]
# Get top 10 stations
Top10Stations <- head(TopStations$StartStationNumber,10)
Top10Stations
## [1] 31200 31623 31201 31258 31247 31229 31214 31101 31241 31613
# Top X station
X <- 1
StationX <- Top10Stations[X]
# Create a dataset for the rental demand in Station X by hour and weekday
# Get the data only for Station X
StationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$StartStationNumber == StationX)
# Count the rental demand in Station X by hour
StationX.summary <- ddply(StationX.df,.(month,weekday,hour),summarise,N = length(hour))
StationX.summary.detail <- ddply(StationX.df,.(year,month,day,weekday,hour),summarise,N = length(hour))
# Count the returned bike in Station X by hour
EndStationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$EndStationNumber == StationX)
EndStationX.summary.detail <- ddply(EndStationX.df,.(end_year,end_month,end_day,end_weekday,end_hour),
summarise,N = length(end_hour))
colnames(EndStationX.summary.detail)[1:5] <- c('year','month','day','weekday','hour')
# Count the rental demand in Station X by hour and weekday
StationX.DayAndHour <- aggregate(N ~ weekday + hour, data = StationX.summary, FUN = sum)
# Visualize the rental demand in Station X by hour and weekday
ggplot(StationX.DayAndHour, aes(x = as.factor(hour), y = N, colour = weekday)) +
geom_point(data = StationX.DayAndHour, aes(group = weekday)) +
geom_line(data = StationX.DayAndHour, aes(group = weekday)) +
scale_x_discrete("hour") +
scale_y_continuous("N") +
theme_minimal() +
ggtitle(sprintf("Rental Demand in Station %d", Top10Stations[X])) +
theme_minimal() +
theme(plot.title=element_text(size=18))+
theme(plot.title = element_text(hjust = 0.5))
library(plotly)
# Get the rental demand of station X on Mondays at each hour and each month
Demand <- xtabs(N ~ hour + month, data = subset(StationX.summary, StationX.summary$weekday == 'Monday'))
plot_ly(x = 1:12, z = Demand, type = "surface") %>%
layout(scene = list(title = "Rental Demand on Saturdays in Station 31623, x = month, y= hour, z1 = count", xaxis = list(title = "Month"), yaxis = list(title = "Hour"), zaxis = list(title = "Demand")))
# Stations that use 12th polynomial model
Model1Station <- c(31623, 31201, 31258, 31247, 31229, 31214, 31101, 31613)
# Stations that use 10th polynomial model
Model2Station <- c(31200, 31241)
# 12th polynomial model for weekday
weekdayPolyModel1 <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekday$year_p2 <- dataset.out.weekday$year^2
dataset.out.weekday$month_p2 <- dataset.out.weekday$month^2
dataset.out.weekday$hour_p2 <- dataset.out.weekday$hour^2
dataset.out.weekday$hour_p3 <- dataset.out.weekday$hour^3
dataset.out.weekday$hour_p4 <- dataset.out.weekday$hour^4
dataset.out.weekday$hour_p5 <- dataset.out.weekday$hour^5
dataset.out.weekday$hour_p6 <- dataset.out.weekday$hour^6
dataset.out.weekday$hour_p7 <- dataset.out.weekday$hour^7
dataset.out.weekday$hour_p8 <- dataset.out.weekday$hour^8
dataset.out.weekday$hour_p9 <- dataset.out.weekday$hour^9
dataset.out.weekday$hour_p10 <- dataset.out.weekday$hour^10
dataset.out.weekday$hour_p11 <- dataset.out.weekday$hour^11
dataset.out.weekday$hour_p12 <- dataset.out.weekday$hour^12
# Fitting the polynomial regression
regressor.weekday <- lm(formula = out ~ year+year_p2+month+month_p2+hour+hour_p2+hour_p3+hour_p4
+hour_p5+hour_p6+hour_p7+hour_p8+hour_p9+hour_p10+hour_p11+hour_p12,
data = dataset.out.weekday)
# Function returns the regressor for weekday
return(regressor.weekday)
}
# 10th polynomial model for weekday
weekdayPolyModel2 <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekday$year_p2 <- dataset.out.weekday$year^2
dataset.out.weekday$month_p2 <- dataset.out.weekday$month^2
dataset.out.weekday$hour_p2 <- dataset.out.weekday$hour^2
dataset.out.weekday$hour_p3 <- dataset.out.weekday$hour^3
dataset.out.weekday$hour_p4 <- dataset.out.weekday$hour^4
dataset.out.weekday$hour_p5 <- dataset.out.weekday$hour^5
dataset.out.weekday$hour_p6 <- dataset.out.weekday$hour^6
dataset.out.weekday$hour_p7 <- dataset.out.weekday$hour^7
dataset.out.weekday$hour_p8 <- dataset.out.weekday$hour^8
dataset.out.weekday$hour_p9 <- dataset.out.weekday$hour^9
dataset.out.weekday$hour_p10 <- dataset.out.weekday$hour^10
# Fitting the polynomial regression
regressor.weekday <- lm(formula = out ~ year+year_p2+month+month_p2+hour+hour_p2+hour_p3+hour_p4+
hour_p5+hour_p6+hour_p7+hour_p8+hour_p9+hour_p10,
data = dataset.out.weekday)
return(regressor.weekday)
}
# 4th polynomial model for weekend
weekendPolyModel <- function(input_dataset){
dataset.out.weekday <- input_dataset
dataset.out.weekend$year_p2 <- dataset.out.weekend$year^2
dataset.out.weekend$month_p2 <- dataset.out.weekend$month^2
dataset.out.weekend$hour_p2 <- dataset.out.weekend$hour^2
dataset.out.weekend$hour_p3 <- dataset.out.weekend$hour^3
dataset.out.weekend$hour_p4 <- dataset.out.weekend$hour^4
# Fitting the polynomial regression
regressor.weekend <- lm(formula = out ~ year+year_p2+month+month_p2+
hour+hour_p2+hour_p3+hour_p4,
data = dataset.out.weekend)
# Function returns the regressor for weekday
return(regressor.weekend)
}
# Function to select the model for weekday prediction
predictStationXinWeekday <- function(SelectStation){
# If the station number is in Model1 then use Model1 (12th poly)
if (SelectStation %in% Model1Station){
RegressorWeekday = weekdayPolyModel1(dataset.out.weekday)
}
# If the station number is in Model2 then use Model2 (10th poly)
if (SelectStation %in% Model2Station){
RegressorWeekday = weekdayPolyModel2(dataset.out.weekday)
}
return(RegressorWeekday)
}
# Function to use model (4th poly) for weekend prediction
predictStationXinWeekend <- function(SelectStation){
RegressorWeekend = weekendPolyModel(dataset.out.weekend)
return(RegressorWeekend)
}
# Function to create the predicted dataset in weekday
predictedWeekdayDemand <- function(SelectStation,Year,Month,Hour){
# Select the model 1 (12th poly)
if (SelectStation %in% Model1Station){
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4,
hour_p5 = Hour^5, hour_p6 = Hour^6, hour_p7 = Hour^7, hour_p8 = Hour^8,
hour_p9 = Hour^9, hour_p10 = Hour^10, hour_p11 = Hour^11, hour_p12 = Hour^12)
}
# Select the model 2 (10th poly)
if (SelectStation %in% Model2Station){
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4,
hour_p5 = Hour^5, hour_p6 = Hour^6, hour_p7 = Hour^7, hour_p8 = Hour^8,
hour_p9 = Hour^9, hour_p10 = Hour^10)
}
# Create the predicted dataset in weekday
predictedWeekdayData = cbind.data.frame(hour = Hour,
demand = predict(myPolyModelWeekday,newdata = modeldata))
# Return the predicted dataset in weekday
return(predictedWeekdayData)
}
# Function to create the predicted dataset in weekend
predictedWeekendDemand <- function(SelectStation,Year,Month,Hour){
# Use 4th poly model
modeldata <- data.frame(year = Year, year_p2 = Year^2, month = Month, month_p2 = Month^2,
hour = Hour, hour_p2 = Hour^2, hour_p3 = Hour^3, hour_p4 = Hour^4)
# Create the predicted dataset in weekend
predictedWeekendData = cbind.data.frame(hour = Hour,
demand = predict(myPolyModelWeekend,newdata = modeldata))
# Return the predicted dataset in weekend
return(predictedWeekendData)
}
# Select station X
X <- 9
StationX <- Top10Stations[X]
# Create a dataset for the rental demand in Station X by hour and weekday
# Count the rental demand in Station X by hour
StationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$StartStationNumber == StationX)
# Count the rental demand in Station X by hour
StationX.summary <- ddply(StationX.df,.(month,weekday,hour),summarise,N = length(hour))
StationX.summary.detail <- ddply(StationX.df,.(year,month,day,weekday,hour),summarise,N = length(hour))
# Count the returned bike in Station X by hour
EndStationX.df <- subset(DemandPredictionDataSet, DemandPredictionDataSet$EndStationNumber == StationX)
EndStationX.summary.detail <- ddply(EndStationX.df,.(end_year,end_month,end_day,end_weekday,end_hour),
summarise,N = length(end_hour))
colnames(EndStationX.summary.detail)[1:5] <- c('year','month','day','weekday','hour')
StationX.demand.detail <- merge.data.frame(StationX.summary.detail, EndStationX.summary.detail,
by = c('year','month','day','weekday','hour'),
all = TRUE)
StationX.demand.detail[is.na(StationX.demand.detail)] <- 0
colnames(StationX.demand.detail)[6:7] <- c('out','In')
StationX.demand.detail$netout <- StationX.demand.detail$out - StationX.demand.detail$In
dataset.out <- StationX.demand.detail[c(1:5,6)]
# Create the dataset for weekday and weekend
weekend.position <- dataset.out$weekday == 'Saturday' | dataset.out$weekday == 'Sunday'
dataset.out.weekday <- subset(dataset.out, weekend.position == FALSE)
dataset.out.weekend <- subset(dataset.out, weekend.position == TRUE)
PredictYear <- 2015
PredictMonth <- 6
hour_grid <- seq(0, 23, 0.1)
# Predict weekday rental demand in Station X
myPolyModelWeekday <- predictStationXinWeekday(StationX)
summary(myPolyModelWeekday)
##
## Call:
## lm(formula = out ~ year + year_p2 + month + month_p2 + hour +
## hour_p2 + hour_p3 + hour_p4 + hour_p5 + hour_p6 + hour_p7 +
## hour_p8 + hour_p9 + hour_p10, data = dataset.out.weekday)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4898 -2.5461 -0.3761 1.9984 28.9818
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.134e+06 6.294e+04 -18.01 <2e-16 ***
## year 1.126e+03 6.251e+01 18.01 <2e-16 ***
## year_p2 -2.794e-01 1.552e-02 -18.00 <2e-16 ***
## month 1.704e+00 3.494e-02 48.77 <2e-16 ***
## month_p2 -1.195e-01 2.608e-03 -45.83 <2e-16 ***
## hour -7.479e+00 7.435e-01 -10.06 <2e-16 ***
## hour_p2 1.317e+01 9.194e-01 14.32 <2e-16 ***
## hour_p3 -8.281e+00 4.476e-01 -18.50 <2e-16 ***
## hour_p4 2.462e+00 1.139e-01 21.61 <2e-16 ***
## hour_p5 -3.984e-01 1.710e-02 -23.30 <2e-16 ***
## hour_p6 3.794e-02 1.595e-03 23.79 <2e-16 ***
## hour_p7 -2.191e-03 9.348e-05 -23.43 <2e-16 ***
## hour_p8 7.552e-05 3.351e-06 22.54 <2e-16 ***
## hour_p9 -1.432e-06 6.708e-08 -21.34 <2e-16 ***
## hour_p10 1.149e-08 5.745e-10 20.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.262 on 23974 degrees of freedom
## Multiple R-squared: 0.4867, Adjusted R-squared: 0.4864
## F-statistic: 1624 on 14 and 23974 DF, p-value: < 2.2e-16
# Get the predicted demand data
predictedWeekdayData = predictedWeekdayDemand(StationX,PredictYear,PredictMonth,hour_grid)
# Get the actual deamnd data in the month and year
PointToPlot1 <- dataset.out.weekday$year == PredictYear & dataset.out.weekday$month == PredictMonth
plotData <- dataset.out.weekday[PointToPlot1, c(5,6)]
# Plot the boxplot of the actual data and lineplot of the predicted data
plotWeekday <- ggplot()+
geom_boxplot(data = plotData,aes(x=hour,y =out, group = hour))+
geom_line(data = predictedWeekdayData, aes(x=hour, y=demand), colour = 'blue',group = 1) +
ggtitle(paste("Weekday Rental Demand at Station ",StationX, " in ",PredictYear, "-", PredictMonth, sep = "")) +
xlab('Hour') +
ylab('Demand')+
theme(plot.title = element_text(hjust = 0.5))
plotWeekday
#Predict weekend rental demand in Station X
myPolyModelWeekend <- predictStationXinWeekend(StationX)
summary(myPolyModelWeekend)
##
## Call:
## lm(formula = out ~ year + year_p2 + month + month_p2 + hour +
## hour_p2 + hour_p3 + hour_p4, data = dataset.out.weekend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7333 -1.8686 -0.4087 1.3897 17.5835
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.406e+05 6.797e+04 -13.84 <2e-16 ***
## year 9.342e+02 6.750e+01 13.84 <2e-16 ***
## year_p2 -2.320e-01 1.676e-02 -13.84 <2e-16 ***
## month 1.263e+00 3.809e-02 33.15 <2e-16 ***
## month_p2 -9.131e-02 2.838e-03 -32.18 <2e-16 ***
## hour -1.299e+00 8.157e-02 -15.93 <2e-16 ***
## hour_p2 3.418e-01 1.478e-02 23.13 <2e-16 ***
## hour_p3 -2.210e-02 9.520e-04 -23.22 <2e-16 ***
## hour_p4 4.223e-04 2.010e-05 21.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.876 on 9526 degrees of freedom
## Multiple R-squared: 0.3481, Adjusted R-squared: 0.3475
## F-statistic: 635.7 on 8 and 9526 DF, p-value: < 2.2e-16
# Get the predicted demand data
predictedWeekendData = predictedWeekendDemand(StationX,PredictYear,PredictMonth,hour_grid)
# Get the actual deamnd data in the month and year
PointToPlot1 <- dataset.out.weekend$year == PredictYear & dataset.out.weekend$month == PredictMonth
plotData <- dataset.out.weekend[PointToPlot1, c(5,6)]
# Plot the boxplot of the actual data and lineplot of the predicted data
plotWeekend <- ggplot()+
geom_boxplot(data = plotData,aes(x=hour,y =out, group = hour))+
geom_line(data = predictedWeekendData, aes(x=hour, y=demand), colour = 'blue',group = 1) +
ggtitle(paste("Weekend Rental Demand at Station ",StationX, " in ",PredictYear, "-", PredictMonth, sep = "")) +
xlab('Hour') +
ylab('Demand')+
theme(plot.title = element_text(hjust = 0.5))
plotWeekend